from eurostat_dataset import eurostat_dataset # packed in class the method to wrangle eurostat's table
import pandas as pd
import sqlite3
import plotly.express as px
import plotly.offline as pyo
# The result can be printed to html by nbconvert. For this:
# - pip install nbconvert -U in the environment
# - jupyter nbconvert Eurostat.ipynb --no-input --to html --no-prompt - in the terminal
# uncheck this comment to download dataset to the sqlite database
# eurostat_dataset(code='NRG_BAL_S').WriteToDatabase()
# eurostat_dataset(code='NRG_INF_EPCRW').WriteToDatabase()
# write to database, so you don't need to wait everytime when you restart the kernel or the file
#check the tables that had been written to the database:
def db_tables():
con = sqlite3.connect('ngr.db')
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())
con.close
# read table from db to DataFrame
def read_df_from_db(table):
con = sqlite3.connect('ngr.db')
cursor = con.cursor()
df = pd.read_sql_query("SELECT * FROM "+ table , con)
con.close
df.drop(columns=['index'],inplace=True)
return df
tables = db_tables()
print(f'The list of tables in the local sqlite database {tables}') # we can see the tables in the database 'ngr.db'
[('nrg_bal_s',), ('nrg_inf_epcrw',)]
The list of tables in the local sqlite database None
renew = eurostat_dataset(code='nrg_inf_epcrw').DatasetInfo()
dataset_info=eurostat_dataset(code='NRG_BAL_S').DatasetInfo()
print(f'The first dataset: "{dataset_info[0]}" dates from {dataset_info[1]} to {dataset_info[2]}.')
print(f'The second dataset: "{renew[0]}" dates from {renew[1]} to {renew[2]}.')
The first dataset: "Simplified energy balances" dates from 1990 to 2020. The second dataset: "Electricity production capacities for renewables and wastes" dates from 1990 to 2020.
#reading the table of the first dataset from local database
bal = read_df_from_db('nrg_bal_s')
balance=bal.copy()
balance = balance.query('unit=="Gigawatt-hour" and siec=="Total" and nrg_bal=="Final consumption - energy use" \
and geo not in ("Euro area - 19 countries (from 2015)","European Union - 27 countries (from 2020)")')
get_energy = read_df_from_db('nrg_inf_epcrw')
Final consumption - energy use (Gigawatt-hour)
balance.head()
| nrg_bal | siec | unit | geo | date | value | |
|---|---|---|---|---|---|---|
| 5778 | Final consumption - energy use | Total | Gigawatt-hour | Albania | 2020 | 21474.051 |
| 5779 | Final consumption - energy use | Total | Gigawatt-hour | Austria | 2020 | 288624.871 |
| 5780 | Final consumption - energy use | Total | Gigawatt-hour | Bosnia and Herzegovina | 2020 | 46804.808 |
| 5781 | Final consumption - energy use | Total | Gigawatt-hour | Belgium | 2020 | 360985.528 |
| 5782 | Final consumption - energy use | Total | Gigawatt-hour | Bulgaria | 2020 | 110633.154 |
balance.value=pd.to_numeric(balance.value)
balance.date=pd.to_numeric(balance.date)
#I like plotly. It is more interactive)
# https://plotly.com/python/plotly-express/#gallery
pyo.init_notebook_mode() #trying to render the plot in github, but it is warking only in nbviewer!https://nbviewer.org/
fig = px.line(balance, x="date", y="value", color="geo",title="Final consumption - energy use,Gigawatt-hour")
pyo.iplot(fig)
#get population dataset and clean
pop = eurostat_dataset('TPS00001').GetDf()
population=pop.copy()
population['value'] = population.value.astype('str').fillna('').str.extract(r"(\d+)|(\d+)\.")[0].astype('Int32')
population['date'] = pd.to_numeric(population.date)
#merge with balance tables
m_balance = balance.merge(population[["geo","date","value"]],on=['geo',"date"],suffixes=("", "_population"))
m_balance = m_balance.assign(per_capita=lambda x: x.value*1000/x.value_population)
pyo.init_notebook_mode()
fig = px.line(m_balance, x="date", y="per_capita", color="geo",
title="Final consumption - energy use per capita,Megawatt-hour")
pyo.iplot(fig)
get_energy.head()
| siec | plant_tec | unit | geo | date | value | |
|---|---|---|---|---|---|---|
| 0 | Solid biofuels | Net maximum electrical capacity | Megawatt | Albania | 2020 | 0.000 |
| 1 | Solid biofuels | Net maximum electrical capacity | Megawatt | Austria | 2020 | 816.199 |
| 2 | Solid biofuels | Net maximum electrical capacity | Megawatt | Bosnia and Herzegovina | 2020 | 1.120 |
| 3 | Solid biofuels | Net maximum electrical capacity | Megawatt | Belgium | 2020 | 563.500 |
| 4 | Solid biofuels | Net maximum electrical capacity | Megawatt | Bulgaria | 2020 | 15.064 |
"Gross electricity production"
el_production = bal.query('unit=="Gigawatt-hour" and siec=="Total" and nrg_bal=="Gross electricity production" \
and geo not in ("Euro area - 19 countries (from 2015)","European Union - 27 countries (from 2020)")')
# As for Latvia in 2020:
# - Energy consumption: 44172.893 Gigawatt-hour
# - electricity production - 5724.846 Gigawatt-hour. 13% from Energy consumption
# - electricity production from renewable - 3649.529 Gigawatt-hour. 63% from electricity production
#
get_energy_lv2020 = get_energy.query('geo=="Latvia" and date=="2020" and siec in ("Solid biofuels","Biogases","Hydro","Geothermal","Wind","Solar")')
get_energy_lv2020.value/get_energy_lv2020.value.sum()*100
25 5.262178 197 3.300042 240 86.890047 455 0.000000 498 4.268265 627 0.279468 Name: value, dtype: float64
el_production.query('geo=="Latvia" and date=="2020"')
| nrg_bal | siec | unit | geo | date | value | |
|---|---|---|---|---|---|---|
| 52254 | Gross electricity production | Total | Gigawatt-hour | Latvia | 2020 | 5724.846 |
bal.query('unit=="Gigawatt-hour" and nrg_bal=="Gross electricity production" \
and geo=="Latvia" and date=="2020"')
| nrg_bal | siec | unit | geo | date | value | |
|---|---|---|---|---|---|---|
| 51006 | Gross electricity production | Solid fossil fuels | Gigawatt-hour | Latvia | 2020 | 0.0 |
| 51132 | Gross electricity production | Manufactured gases | Gigawatt-hour | Latvia | 2020 | 0.0 |
| 51256 | Gross electricity production | Electricity | Gigawatt-hour | Latvia | 2020 | None |
| 51378 | Gross electricity production | Natural gas | Gigawatt-hour | Latvia | 2020 | 2075.163 |
| 51502 | Gross electricity production | Heat | Gigawatt-hour | Latvia | 2020 | None |
| 51624 | Gross electricity production | Nuclear heat | Gigawatt-hour | Latvia | 2020 | 0.0 |
| 51750 | Gross electricity production | Oil and petroleum products (excluding biofuel ... | Gigawatt-hour | Latvia | 2020 | 0.106 |
| 51876 | Gross electricity production | Peat and peat products | Gigawatt-hour | Latvia | 2020 | 0.048 |
| 52002 | Gross electricity production | Renewables and biofuels | Gigawatt-hour | Latvia | 2020 | 3649.529 |
| 52128 | Gross electricity production | Oil shale and oil sands | Gigawatt-hour | Latvia | 2020 | 0.0 |
| 52254 | Gross electricity production | Total | Gigawatt-hour | Latvia | 2020 | 5724.846 |
| 52380 | Gross electricity production | Non-renewable waste | Gigawatt-hour | Latvia | 2020 | 0.0 |